Analysis date: 2020-01-10

Setup

Load libraries

library(plyr)
library(gtools)
library(openxlsx)
library(pheatmap)
library(reshape2)
library(progress)
library(Matrix)
library(Hmisc)
library(lemon)
library(ggpubr)
library(effsize)
library(ggbeeswarm)
library(ggfortify)
library(ggpmisc)
library(ggrepel)
library(readxl)
library(DESeq2)
library(TOSTER)
library(tidyverse)
library(vsn)
library(fdrtool)
library(limma)
library(apeglm)
library(IHW)
library(Rtsne)
library(biomartr)
library(biomaRt)
library(MultiAssayExperiment)
library(PMA)
library(gplots)
library(RColorBrewer)
library(grid)
library(ConsensusClusterPlus)
library(survival)
library(survminer)
library(cowplot)

Load data

source("/Volumes/sd17b003/Sophie/Analysis/Screen_analysis/Figure_layouts.R")
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Proteomics_Git/Robjects/CLL_Proteomics_Setup.RData")
load("/Volumes/sd17b003/Sophie/Analysis/CLL_Proteomics/CLL_Proteomics_final/Proteomics_Git/Robjects/CLL_Proteomics_ConsensusClustering.RData")
colData(multiomics_MAE)$PG <- as.factor(CCP_group5[rownames(colData(multiomics_MAE))])
colData(multiomics_MAE)$CCP6_RNA <- as.factor(CCP_group6_RNA[rownames(colData(multiomics_MAE))])

Analysis

Associations of protein abundance with genetic alterations

Wilcox test

Function

wilcox_proteins_any <- function(g, alteration, plot=FALSE, output=TRUE){
  if("SNPs" %in% (experiments(multiomics_MAE[alteration,,]) %>% names()) ){
    ty="SNPs_"}else if("chrom_abber" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
      ty="chrom_abber_"}else if("health_record_bin" %in% (experiments(multiomics_MAE[alteration,,]) %>% names() ) ){
        ty="health_record_bin_"}
  dat_g <- wideFormat(multiomics_MAE[c(g , alteration),,] ) %>% as_tibble() 
  dat_g <- dat_g %>% mutate(alt =as.logical(get(paste0(ty, alteration), envir=as.environment(dat_g))))
  
  if(grepl("ENSG", g)){
    assay_data="RNAseq_norm_"
    cap <- metadata(multiomics_MAE)$gene_symbol_mapping %>% filter(ensembl_gene_id==g) %>% .$hgnc_symbol
    col_b <- "#92c5de"
    col_r <- "#f4a582"
    cap_y <- paste("Transcript abundance", cap)
    }else{
    assay_data="proteomics_"
    cap <- g
    col_b <- "#0571b0"
    col_r <- "#ca0020"
    cap_y <- paste("Protein abundance", cap)
  }
  
  wt <- dat_g %>% filter(alt==0) %>% .[,paste0(assay_data, g)] %>% unlist
  mut <- dat_g %>% filter(alt==1) %>% .[,paste0(assay_data, g)] %>% unlist
  alt_lab <- alteration
  alt_lab <- gsub("_mutated", "", alt_lab)
  wt_lab <- "wt"
  mut_lab <- "mut"
  if(alt_lab=="IGHV"){
    wt_lab="U-CLL"
  mut_lab="M-CLL"}
  
  wx.test <- wilcox.test(wt, mut)
  if(plot==TRUE){
    p <- dat_g %>% filter(!is.na(alt )) %>% 
      ggplot(aes_string("alt", paste0(assay_data,g), fill="alt" )) + 
      geom_boxplot() + geom_beeswarm() + 
      xlab(alt_lab) +
      scale_x_discrete(labels=c(wt_lab, mut_lab)) +
      ylab(cap_y)+
      pp_sra + 
      scale_fill_manual(values = c(col_b,col_r)) 
      
    print(p+theme(aspect.ratio=2)+ theme(legend.position = 'none') +
      stat_compare_means(method = "wilcox", label = "p.signif", 
                         comparisons = list(c("TRUE","FALSE"))) )
    print(paste("pvalue",wx.test$p.value) )
  }
  if(output==TRUE){return(c(g,  alteration, length(wt), length(mut), wx.test$p.value))
    }else if(all(plot==TRUE, output=="plot")){return(p)}
}
ZAP70_P_plot <- wilcox_proteins_any(g="ZAP70", plot = TRUE, output = "plot", alteration="IGHV_mutated")

## [1] "pvalue 7.77315882449946e-07"
ZAP70_R_plot <- wilcox_proteins_any(g="ENSG00000115085", plot = TRUE, output = "plot", alteration="IGHV_mutated")

## [1] "pvalue 2.57528421801168e-07"
wilcox_proteins_any(g="BTK", plot = TRUE, alteration="IGHV_mutated")

## [1] "pvalue 0.423423159867772"
## [1] "BTK"               "IGHV_mutated"      "33"               
## [4] "42"                "0.423423159867772"
wilcox_proteins_any(g="BTK", plot = TRUE, alteration="trisomy12")

## [1] "pvalue 0.00018643349672198"
## [1] "BTK"                 "trisomy12"           "56"                 
## [4] "21"                  "0.00018643349672198"
ATM_P_plot <- wilcox_proteins_any(g="ATM", plot = TRUE, output = "plot", alteration="ATM")

## [1] "pvalue 0.00103072061207342"
ATM_R_plot <- wilcox_proteins_any(g="ENSG00000149311", plot = TRUE, output = "plot", alteration="ATM")

## [1] "pvalue 0.172492816496971"
TP53_P_plot <- wilcox_proteins_any(g="TP53", plot = TRUE, output = "plot", alteration="TP53")

## [1] "pvalue 0.000117833904429591"
TP53_R_plot <- wilcox_proteins_any(g="ENSG00000141510", plot = TRUE, output = "plot", alteration="TP53")

## [1] "pvalue 0.00502557822614873"
SF3B1_P_plot <- wilcox_proteins_any(g="SF3B1", plot = TRUE, alteration="SF3B1", output = "plot")

## [1] "pvalue 0.248991438694799"
wilcox_proteins_any(g="ENSG00000115524", plot = TRUE, alteration="TP53")

## [1] "pvalue 0.0844632924013222"
## [1] "ENSG00000115524"    "TP53"               "67"                
## [4] "13"                 "0.0844632924013222"
XPO1_P_plot <- wilcox_proteins_any(g="XPO1", plot = TRUE, output = "plot", alteration="XPO1")

## [1] "pvalue 0.00378833710722129"
XPO1_R_plot <- wilcox_proteins_any(g="ENSG00000082898", plot = TRUE, output = "plot", alteration="XPO1")

## [1] "pvalue 0.182153110588958"
IGHM_P_plot <- wilcox_proteins_any(g="IGHM", plot = TRUE, alteration="IGHV_mutated", output = "plot")

## [1] "pvalue 0.000898156918617089"

IGHM and ZAP70 levels only within PG5

PG5_IGHV_ZAP60IGHM_scatter <- wideFormat(multiomics_MAE[c("ZAP70", "IGHM" , "IGHV_mutated"),,], colDataCols = "PG" ) %>% as_tibble() %>%
  filter(PG==5, health_record_bin_IGHV_mutated %in% c(1,0) ) %>%
  mutate(IGHV=as.factor(if_else(health_record_bin_IGHV_mutated==0, "U-CLL", "M-CLL"))) %>%
  ggplot(aes(proteomics_ZAP70, proteomics_IGHM, color=IGHV)) +
  geom_point() + pp_sra +
  scale_color_manual(values = c("#ca0020", "#0571b0")) +
  xlab("Protein abundance ZAP70") +
  ylab("Protein abundance IGHM") 
## ExperimentList contains data.frame or DataFrame,
##   potential for errors with mixed data types
PG5_IGHV_ZAP60IGHM_scatter

Chromosome position and protein abundance

plot_chromosome_theme <- list(
    coord_cartesian(ylim=c(-0.7,0.7)),
    facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
    ylab("log2 norm. protein abundance"),
    xlab("Protein location on chromosome"),
    scale_color_manual(values=c("#0571b0", "#ca0020", "grey"))
)
Chr12_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("12")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("trisomy12") +
  geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=133275309, color="gray40", size=1.5,  fill=NA)
Chr12_P_plot +  theme(aspect.ratio=0.4, legend.position = 'none') 

Chr11_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("11")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra_noguides +
  ggtitle("del11q")+
  geom_rect(xmin = 97400000, ymin=-0.68, ymax=0.68, xmax=110600000, color="gray40", size=1.5,  fill=NA)
Chr11_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

Chr13_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("13")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra_noguides +
  ggtitle("del13q")+
  geom_rect(xmin = 47000000,ymin=-0.68, ymax=0.68, xmax=51000000, color="gray40", size=1.5,  fill=NA)
Chr13_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

Chr17_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("17")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("del17p")+
  geom_rect(xmin = 0, ymin=-0.68, ymax=0.68, xmax=10800000, color="gray40", size=1.5,  fill=NA)
Chr17_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

Chr8_P_plot <- proteomics_tbl_meta_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("8")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
  plot_chromosome_theme +
  pp_sra +
  ggtitle("gain8q24")+
  geom_rect(xmin = 117700001, ymin=-0.68, ymax=0.68, xmax=146364022, color="gray40", size=1.5,  fill=NA)
Chr8_P_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

Chromosome position and RNA

plot_chromosome_theme_RNA <- list(
    coord_cartesian(ylim=c(0.75,1.25)),
    facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
    ylab("log10 norm. RNA counts"),
    xlab("Gene location on chromosome"),
    scale_color_manual(values=c("#92c5de", "#f4a582", "grey"))
)

RNA_biomart<- left_join(
  left_join((longFormat(multiomics_MAE[,,"RNAseq_norm"] ) %>% as_tibble()), 
                                         metadata(multiomics_MAE)$gene_symbol_mapping[c(1,2,3,5:7)], 
                                         by=c("rowname"="ensembl_gene_id")),
  proteomics_tbl_meta_biomart_chrab, by=c("primary"))
## ExperimentList contains data.frame or DataFrame,
##   potential for errors with mixed data types
## harmonizing input:
##   removing 22 colData rownames not in sampleMap 'primary'
RNA_biomart <- left_join( left_join(RNA_biomart,
          proteomics_tbl_meta_biomart_SNP, by=c("primary")),
          proteomics_tbl_meta_biomart_health, by=c("primary"))
# Trisomy 12
Chr12_R_plot <- RNA_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("12")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_trisomy12), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("trisomy12") +
  coord_cartesian(ylim=c(0.95,1.1)) +
  geom_rect(xmin = 0, ymin=0.95, ymax=1.1, xmax=133275309, color="gray40", size=1.5,  fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr12_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

# del11q
Chr11_R_plot <- RNA_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("11")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del11q), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra_noguides +
  ggtitle("del11q")+
  coord_cartesian(ylim=c(0.95,1.1)) +
  geom_rect(xmin = 97400000, ymin=0.95, ymax=1.1, xmax=110600000, color="gray40", size=1.5,  fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr11_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

# del13q
Chr13_R_plot <- RNA_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("13")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del13q14), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra_noguides +
  ggtitle("del13q")+
  coord_cartesian(ylim=c(0.95,1.1)) +
  geom_rect(xmin = 47000000, ymin=0.95, ymax=1.1, xmax=51000000, color="gray40", size=1.5,  fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr13_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

# del17p
Chr17_R_plot <- RNA_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("17")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_del17p13), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("del17p")+
  coord_cartesian(ylim=c(0.95,1.1)) +
  geom_rect(xmin = 0, ymin=0.95, ymax=1.1, xmax=10800000, color="gray40", size=1.5,  fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr17_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

Chr8_R_plot <- RNA_biomart %>%
  filter( !is.na(value), 
          chromosome_name %in% c("8")) %>%
  ggplot(aes(start_position, value, group=primary)) +
  geom_point(size=0.5, alpha=0.2, color="darkgrey") +
  stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=chrom_abber_gain8q24), span=0.5, method = "loess") +
  plot_chromosome_theme_RNA +
  pp_sra +
  ggtitle("gain8q24")+
  coord_cartesian(ylim=c(0.95,1.1)) +
  geom_rect(xmin = 117700001, ymin=0.95, ymax=1.1, xmax=146364022, color="gray40", size=1.5,  fill=NA)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
Chr8_R_plot + theme(aspect.ratio=0.4, legend.position = 'none') 

Session Info

sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] cowplot_1.0.0               survminer_0.4.6            
##  [3] ConsensusClusterPlus_1.46.0 RColorBrewer_1.1-2         
##  [5] gplots_3.0.1.1              PMA_1.1                    
##  [7] MultiAssayExperiment_1.8.3  biomaRt_2.38.0             
##  [9] biomartr_0.9.0              Rtsne_0.15                 
## [11] IHW_1.10.1                  apeglm_1.4.2               
## [13] limma_3.38.3                fdrtool_1.2.15             
## [15] vsn_3.50.0                  forcats_0.4.0              
## [17] stringr_1.4.0               dplyr_0.8.3                
## [19] purrr_0.3.3                 readr_1.3.1                
## [21] tidyr_1.0.0                 tibble_2.1.3               
## [23] tidyverse_1.3.0             TOSTER_0.3.4               
## [25] DESeq2_1.22.2               SummarizedExperiment_1.12.0
## [27] DelayedArray_0.8.0          BiocParallel_1.16.6        
## [29] matrixStats_0.55.0          Biobase_2.42.0             
## [31] GenomicRanges_1.34.0        GenomeInfoDb_1.18.2        
## [33] IRanges_2.16.0              S4Vectors_0.20.1           
## [35] BiocGenerics_0.28.0         readxl_1.3.1               
## [37] ggrepel_0.8.1               ggpmisc_0.3.3              
## [39] ggfortify_0.4.8             ggbeeswarm_0.6.0           
## [41] effsize_0.7.6               ggpubr_0.2.4               
## [43] magrittr_1.5                lemon_0.4.3                
## [45] Hmisc_4.3-0                 ggplot2_3.2.1              
## [47] Formula_1.2-3               survival_3.1-8             
## [49] lattice_0.20-38             Matrix_1.2-18              
## [51] progress_1.2.2              reshape2_1.4.3             
## [53] pheatmap_1.0.12             openxlsx_4.1.4             
## [55] gtools_3.8.1                plyr_1.8.4                 
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_0.2.5       RSQLite_2.1.4          AnnotationDbi_1.44.0  
##   [4] htmlwidgets_1.5.1      munsell_0.5.0          codetools_0.2-16      
##   [7] preprocessCore_1.44.0  withr_2.1.2            colorspace_1.4-1      
##  [10] knitr_1.26             rstudioapi_0.10        ggsignif_0.6.0        
##  [13] labeling_0.3           slam_0.1-46            bbmle_1.0.20          
##  [16] GenomeInfoDbData_1.2.0 lpsymphony_1.10.0      KMsurv_0.1-5          
##  [19] bit64_0.9-7            farver_2.0.1           coda_0.19-3           
##  [22] vctrs_0.2.0            generics_0.0.2         xfun_0.11             
##  [25] R6_2.4.1               locfit_1.5-9.1         bitops_1.0-6          
##  [28] assertthat_0.2.1       scales_1.1.0           nnet_7.3-12           
##  [31] beeswarm_0.2.3         gtable_0.3.0           affy_1.60.0           
##  [34] rlang_0.4.2            zeallot_0.1.0          genefilter_1.64.0     
##  [37] splines_3.5.2          lazyeval_0.2.2         acepack_1.4.1         
##  [40] impute_1.56.0          broom_0.5.2            checkmate_1.9.4       
##  [43] BiocManager_1.30.10    yaml_2.2.0             modelr_0.1.5          
##  [46] backports_1.1.5        tools_3.5.2            affyio_1.52.0         
##  [49] ellipsis_0.3.0         Rcpp_1.0.3             base64enc_0.1-3       
##  [52] zlibbioc_1.28.0        RCurl_1.95-4.12        prettyunits_1.0.2     
##  [55] rpart_4.1-15           zoo_1.8-6              haven_2.2.0           
##  [58] cluster_2.1.0          fs_1.3.1               data.table_1.12.8     
##  [61] reprex_0.3.0           hms_0.5.2              evaluate_0.14         
##  [64] xtable_1.8-4           XML_3.98-1.20          emdbook_1.3.11        
##  [67] gridExtra_2.3          compiler_3.5.2         KernSmooth_2.23-16    
##  [70] crayon_1.3.4           htmltools_0.4.0        geneplotter_1.60.0    
##  [73] lubridate_1.7.4        DBI_1.0.0              dbplyr_1.4.2          
##  [76] MASS_7.3-51.4          cli_2.0.0              gdata_2.18.0          
##  [79] pkgconfig_2.0.3        km.ci_0.5-2            numDeriv_2016.8-1.1   
##  [82] foreign_0.8-72         xml2_1.2.2             annotate_1.60.1       
##  [85] vipor_0.4.5            XVector_0.22.0         rvest_0.3.5           
##  [88] digest_0.6.23          Biostrings_2.50.2      rmarkdown_1.18        
##  [91] cellranger_1.1.0       survMisc_0.5.5         htmlTable_1.13.3      
##  [94] curl_4.3               lifecycle_0.1.0        nlme_3.1-142          
##  [97] jsonlite_1.6           fansi_0.4.0            pillar_1.4.2          
## [100] httr_1.4.1             glue_1.3.1             zip_2.0.4             
## [103] bit_1.1-14             stringi_1.4.3          blob_1.2.0            
## [106] latticeExtra_0.6-28    caTools_1.17.1.3       memoise_1.1.0